Data Visualization-III (Plotly)

In [1]:
# Import the required libraries
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import plotly.express as px
pd.set_option('display.max_columns', None)

import warnings
warnings.filterwarnings('ignore')
In [2]:
# Import dataset
df = sns.load_dataset('iris')
print(df.shape)
df.head(2)
(150, 5)
Out[2]:
sepal_length sepal_width petal_length petal_width species
0 5.1 3.5 1.4 0.2 setosa
1 4.9 3.0 1.4 0.2 setosa
In [3]:
df.info()
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 150 entries, 0 to 149
Data columns (total 5 columns):
 #   Column        Non-Null Count  Dtype  
---  ------        --------------  -----  
 0   sepal_length  150 non-null    float64
 1   sepal_width   150 non-null    float64
 2   petal_length  150 non-null    float64
 3   petal_width   150 non-null    float64
 4   species       150 non-null    object 
dtypes: float64(4), object(1)
memory usage: 6.0+ KB
In [4]:
df.columns
Out[4]:
Index(['sepal_length', 'sepal_width', 'petal_length', 'petal_width',
       'species'],
      dtype='object')

1. Scatter plot¶

In [5]:
fig1 = px.scatter(df, x='sepal_length' , y='sepal_width', color="species")
fig1.show()
# save the plot
fig1.write_html("outputs/fig1.html")

# fig1.write_image("outputs/fig1.png", scale=3)

2. Line plot¶

In [6]:
# sort the data
df_line = df.sort_values(by=['sepal_length'])

fig2 = px.line(df_line, x='sepal_length' , y='sepal_width', color="species")
fig2.show()

#save the plot
fig2.write_html("outputs/fig2.html")

3. Bar plot¶

In [8]:
# group values based on means
df_bar = df.groupby(['species']).mean().reset_index()
df_bar.head()
Out[8]:
species sepal_length sepal_width petal_length petal_width
0 setosa 5.006 3.428 1.462 0.246
1 versicolor 5.936 2.770 4.260 1.326
2 virginica 6.588 2.974 5.552 2.026
In [9]:
fig3 = px.bar(df_bar, x='species' , y='sepal_width')
fig3.show()
# save the plot
fig3.write_html("outputs/fig3.html")
In [10]:
# seaborn does not need to make mean groups

sns.barplot(df, x="species", y= "sepal_width")
plt.show()

4. Boxplot¶

In [11]:
# Box plot

fig4 = px.box(df, x='species' , y='sepal_width', color = 'species')
fig4.show()

# save the plot
fig4.write_html("outputs/fig4.html")

5. Violin plot¶

In [12]:
# violon plot

fig5 = px.violin(df, x='species' , y='sepal_width', color = 'species', box=True)
fig5.show()
# save the plot
fig5.write_html("outputs/fig5.html")

6. Histogram¶

In [13]:
#6 histogram

fig6 = px.histogram(df, x='sepal_width',color = 'species')
fig6.show()

# save the plot
fig6.write_html("outputs/fig6.html")

7. Pie Chart¶

In [14]:
# first we have to check the dataset
df['species'].value_counts()
Out[14]:
setosa        50
versicolor    50
virginica     50
Name: species, dtype: int64
In [15]:
# piechart
df_pie = df['species'].value_counts().reset_index()
df_pie.columns = ['species', 'count']
fig7 = px.pie(df_pie, values='count', names='species')
fig7.show()

# save the plot
fig7.write_html("outputs/fig7.html")

8. scatter 3D plot¶

In [16]:
# scatter 3d plot
fig8 = px.scatter_3d(df, x='sepal_length', y='sepal_width', z='petal_width', color='species')
fig8.show()

# save the plot in html
fig8.write_html("outputs/fig8.html")

9. Area chart¶

In [17]:
# area chart
df_area = df.sort_values(by=['sepal_length'])

fig9 = px.area(df_area, x='sepal_width', y='sepal_length', color='species')
fig9.show()

# save the plot
fig9.write_html("outputs/fig9.html")

10. Bubble Chart¶

In [18]:
# bubble chart

fig10 = px.scatter(df, x='sepal_length', y='sepal_width', size='petal_length', color='species')
fig10.show()
fig10.write_html("outputs/fig10.html")

11. Sunburst chart¶

In [19]:
# 11 sunburst chart
df_sunburst = df.groupby(['species', 'petal_width']).size().reset_index(name='counts')

fig11 = px.sunburst(df_sunburst, path=['species', 'petal_width'], values='counts')
fig11.show()

# save this plot as html
fig11.write_html("outputs/fig11.html")

Let's try another sunburst plot

In [20]:
# create sunburst plot on titanic dataset
titanic = sns.load_dataset('titanic')
# Create a sunburst plot
fig = px.sunburst(titanic, 
                  path=['sex','class', 'who', 'alive', 'alone'], 
                  values='survived' , color='sex')
# Show the plot
fig.show()

fig.write_html("outputs/figg.html")

12. Parallel Coordinates plot¶

In [21]:
# Add a numeric species_id based on species names for coloring
df['species_id'] = df['species'].astype('category').cat.codes

# Parallel Coordinates Plot
fig12 = px.parallel_coordinates(df, color='species_id', labels={'species_id': 'species'}, color_continuous_scale=px.colors.diverging.Tealrose)
fig12.show()

# save the plot in html
fig12.write_html("outputs/fig12.html")

13. Desity contour plot¶

In [22]:
# Desity contour plot

fig13 = px.density_contour(df, x='sepal_length', y='sepal_width', color='species')
fig13.show()
# save this plot
fig13.write_html("outputs/fig13.html")

14. Ternary Plot¶

In [23]:
# Ternary Plot

fig14 = px.scatter_ternary(df, a='sepal_length', b='sepal_width', c='petal_width', color='species')
fig14.show()

# save this plot
fig14.write_html("outputs/fig14.html")

15. Polar chart (Radar Chart)¶

In [24]:
# Polar chart (Radar Chart)

df_radar = df.groupby(['species']).mean().reset_index()

fig15 = px.line_polar(df_radar, r='sepal_length', theta='species', line_close=True)
fig15.show()

# save the plot
fig15.write_html("outputs/fig15.html")
In [25]:
# create sunburst plot on titanic dataset
import plotly.express as px
import seaborn as sns
# Load Titanic dataset
df = sns.load_dataset('titanic')
# Create a sunburst plot
fig = px.sunburst(df, 
                  path=['sex','class', 'who', 'alive', 'alone'], 
                  values='survived' , color='sex')
# Show the plot
fig.show()

fig.write_html("outputs/fig17.html")
In [26]:
tips = sns.load_dataset('tips')
print(tips.shape)
tips.head(2)
(244, 7)
Out[26]:
total_bill tip sex smoker day time size
0 16.99 1.01 Female No Sun Dinner 2
1 10.34 1.66 Male No Sun Dinner 3
In [27]:
fig = px.sunburst(tips, 
                  path=['sex','smoker', 'day', 'time'], 
                  values='tip' , color='sex',
                  hover_data=['tip', 'day', 'time'])
# how can we add more values when we hover the pointer on the plot?
# Show the plot
fig.show()
fig.write_html("outputs/fig18.html")
In [28]:
df = px.data.gapminder()

print(df.shape)
df.head(2)
(1704, 8)
Out[28]:
country continent year lifeExp pop gdpPercap iso_alpha iso_num
0 Afghanistan Asia 1952 28.801 8425333 779.445314 AFG 4
1 Afghanistan Asia 1957 30.332 9240934 820.853030 AFG 4
In [29]:
fig = px.sunburst(df, 
                  path=['continent','country', 'year'], 
                  values='pop' , color='continent')
# how can we add more values when we hover the pointer on the plot?
# Show the plot
fig.show()
fig.write_html("outputs/fig19.html")
In [30]:
# animated plot
px.scatter(df, x= "gdpPercap",
            y = "lifeExp",
            size= "pop", color= "continent",
            animation_frame='year', animation_group="country",
            log_x=True, size_max=55, range_x=[100,100000], range_y=[5,100])
In [31]:
# animated plot
px.scatter(df, x= "gdpPercap",
            y = "lifeExp",
            size= "pop", color= "country",
            animation_frame='year', animation_group="continent",
            log_x=True, size_max=55, range_x=[100,100000], range_y=[5,100])
In [33]:
# save this animated plot in html format
fig = px.scatter(df, x= "gdpPercap",
            y = "lifeExp",
            size= "pop", color= "country",
            animation_frame='year', animation_group="continent",
            log_x=True, size_max=55, range_x=[100,100000], range_y=[5,100])
fig.show()
fig.write_html("outputs/gapminder.html") 
In [38]:
import plotly.express as px
import pandas as pd
import numpy as np
import io
import PIL

fig = px.scatter(df, x= "gdpPercap",
            y = "lifeExp",
            size= "pop", color= "continent",
            animation_frame='year', animation_group="country",
            log_x=True, size_max=55, range_x=[100,100000], range_y=[5,100])

fig.show()

# generate images for each step in animation
frames = []
for s, fr in enumerate(fig.frames):
    # set main traces to appropriate traces within plotly frame
    fig.update(data=fr.data)
    # move slider to correct place
    fig.layout.sliders[0].update(active=s)
    # generate image of current state
    frames.append(PIL.Image.open(io.BytesIO(fig.to_image(format="png", scale=3))))
    
# create animated GIF
frames[0].save(
        "outputs/gapminder.gif",
        save_all=True,
        append_images=frames[1:],
        optimize=True,
        duration=500,
        loop=0,
        dither=None  # Turn off dithering
    )

GEO Maps in Python¶

In [39]:
import plotly.express as px

df = px.data.election()

geojson = px.data.election_geojson()

print(df["district"][2])
print(geojson["features"][0]["properties"])
11-Sault-au-Récollet
{'district': '11-Sault-au-Récollet'}
In [41]:
df.head()
Out[41]:
district Coderre Bergeron Joly total winner result district_id
0 101-Bois-de-Liesse 2481 1829 3024 7334 Joly plurality 101
1 102-Cap-Saint-Jacques 2525 1163 2675 6363 Joly plurality 102
2 11-Sault-au-Récollet 3348 2770 2532 8650 Coderre plurality 11
3 111-Mile-End 1734 4782 2514 9030 Bergeron majority 111
4 112-DeLorimier 1770 5933 3044 10747 Bergeron majority 112
In [43]:
fig = px.choropleth_mapbox(df, geojson=geojson, color="Coderre",
                           locations="district", featureidkey="properties.district",
                           center={"lat": 45.5517, "lon": -73.7073},
                           mapbox_style="carto-positron", zoom=9)
fig.update_layout(margin={"r":0,"t":0,"l":0,"b":0})
fig.show()

fig.write_html("outputs/fig20.html")
In [44]:
data = {
    'Country': ['China', 'India', 'United States', 'Indonesia', 'Pakistan'],
    'Population': [1444216107, 1393409038, 332915073, 276361783, 225199937]
}

# Create a pandas DataFrame
df = pd.DataFrame(data)

# Display the DataFrame
df
Out[44]:
Country Population
0 China 1444216107
1 India 1393409038
2 United States 332915073
3 Indonesia 276361783
4 Pakistan 225199937
In [47]:
df = pd.read_csv("DATA/covid-data.csv")
print(df.shape)
df.head(2)
(302512, 67)
Out[47]:
iso_code continent location date total_cases new_cases new_cases_smoothed total_deaths new_deaths new_deaths_smoothed total_cases_per_million new_cases_per_million new_cases_smoothed_per_million total_deaths_per_million new_deaths_per_million new_deaths_smoothed_per_million reproduction_rate icu_patients icu_patients_per_million hosp_patients hosp_patients_per_million weekly_icu_admissions weekly_icu_admissions_per_million weekly_hosp_admissions weekly_hosp_admissions_per_million total_tests new_tests total_tests_per_thousand new_tests_per_thousand new_tests_smoothed new_tests_smoothed_per_thousand positive_rate tests_per_case tests_units total_vaccinations people_vaccinated people_fully_vaccinated total_boosters new_vaccinations new_vaccinations_smoothed total_vaccinations_per_hundred people_vaccinated_per_hundred people_fully_vaccinated_per_hundred total_boosters_per_hundred new_vaccinations_smoothed_per_million new_people_vaccinated_smoothed new_people_vaccinated_smoothed_per_hundred stringency_index population_density median_age aged_65_older aged_70_older gdp_per_capita extreme_poverty cardiovasc_death_rate diabetes_prevalence female_smokers male_smokers handwashing_facilities hospital_beds_per_thousand life_expectancy human_development_index population excess_mortality_cumulative_absolute excess_mortality_cumulative excess_mortality excess_mortality_cumulative_per_million
0 AFG Asia Afghanistan 2020-01-03 NaN 0.0 NaN NaN 0.0 NaN NaN 0.0 NaN NaN 0.0 NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN 54.422 18.6 2.581 1.337 1803.987 NaN 597.029 9.59 NaN NaN 37.746 0.5 64.83 0.511 41128772.0 NaN NaN NaN NaN
1 AFG Asia Afghanistan 2020-01-04 NaN 0.0 NaN NaN 0.0 NaN NaN 0.0 NaN NaN 0.0 NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN 54.422 18.6 2.581 1.337 1803.987 NaN 597.029 9.59 NaN NaN 37.746 0.5 64.83 0.511 41128772.0 NaN NaN NaN NaN
In [48]:
df_total_cases = df.groupby(['location'])['total_cases'].mean().reset_index()
df_total_cases
Out[48]:
location total_cases
0 Afghanistan 1.198330e+05
1 Africa 7.240734e+06
2 Albania 1.734666e+05
3 Algeria 1.678651e+05
4 American Samoa 5.085308e+03
... ... ...
250 Western Sahara NaN
251 World 2.910590e+08
252 Yemen 7.537913e+03
253 Zambia 1.850571e+05
254 Zimbabwe 1.334758e+05

255 rows × 2 columns

In [49]:
# Create the map visualization
fig = px.choropleth(df_total_cases, locations='location', locationmode='country names', color = 'total_cases',
                    title='Total Cases by Country', range_color=[0, 10000000])
# Display the map
fig.show()
fig.write_html("outputs/fig21.html")
In [50]:
px.choropleth(df, locations='location', locationmode='country names', color = 'new_cases',
                    title='Total Cases by Country', range_color=[0, 10000],
                    animation_frame= 'date')

Assignment: Animate this graph using Monthly Data, convert the data into monthly data and then animated the plot, save it as avi and mp4 file or gif¶

In [51]:
# group df based on each month in date and take average of new_cases
# Convert the 'date' column to datetime format
df['date'] = pd.to_datetime(df['date'])

# Create a new column 'year_month' to represent the year and month
df['year_month'] = df['date'].dt.to_period('M')

# Now group by 'year_month' and 'location', and sum the 'new_cases' for each group
monthly_cases = df.groupby(['year_month', 'location']).agg({'total_cases': 'sum'}).reset_index()

# Convert 'year_month' back to a string format for readability
monthly_cases['year_month'] = monthly_cases['year_month'].astype(str)

# This will give you a new DataFrame with the sum of new cases for each month and location
monthly_cases.head()
Out[51]:
year_month location total_cases
0 2020-01 Afghanistan 0.0
1 2020-01 Africa 0.0
2 2020-01 Albania 0.0
3 2020-01 Algeria 0.0
4 2020-01 American Samoa 0.0
In [52]:
fig = px.choropleth(monthly_cases, locations='location', locationmode='country names', color = 'total_cases',
                    title='Total Covid Cases Worldwide (January 2021 -to- April 2023)', 
                    range_color=[0, monthly_cases['total_cases'].quantile(0.85)],
                    animation_frame= 'year_month', color_continuous_scale='viridis',
                    labels={'year_month':'Year-Month', 'total_cases':'Total Cases'}
                    )
# increase the size of the map
fig.update_layout(height=600, width=800)
In [53]:
# saving animated gif
import plotly.express as px
import pandas as pd
import numpy as np
import io
import PIL
fig = px.choropleth(monthly_cases, locations='location', locationmode='country names', color = 'total_cases',
                    title='Total Covid Cases Worldwide (January 2021 -to- April 2023)', 
                    range_color=[0, monthly_cases['total_cases'].quantile(0.85)],
                    animation_frame= 'year_month', color_continuous_scale='viridis',
                    labels={'year_month':'Year-Month', 'total_cases':'Total Cases'}
                    )

fig.show()
fig.write_html("outputs/fig22.html")

# increase the size of the map
fig.update_layout(height=600, width=800)

# generate images for each step in animation
frames = []
for s, fr in enumerate(fig.frames):
    # set main traces to appropriate traces within plotly frame
    fig.update(data=fr.data)
    # move slider to correct place
    fig.layout.sliders[0].update(active=s)
    # generate image of current state
    frames.append(PIL.Image.open(io.BytesIO(fig.to_image(format="png", scale=3))))
    
# create animated GIF
frames[0].save(
        "./output/total_covid_cases_worldwide.gif",
        save_all=True,
        append_images=frames[1:],
        optimize=True,
        duration=500, # milliseconds per frame
        loop=0, # infinite loop
        dither=None  # Turn off dithering
    )
In [ ]: